## Vito D'Orazio
## plos_bivariate_monthlyANDucdp_111914.r

## inputs: monthly_seq_analysis.dta or seq_analysis_ucdp_111914.dta

library(foreign)
library(VGAM)
library(ggplot2)
library(extrafont)
library(xtable)

rm(list=ls())

setwd("/Users/vjdorazio/Desktop/Sequence_Analysis_paper/plos_version/data")

# IF monthly
#UCDP <- FALSE
#mydata <- read.dta("monthly_seq_analysis.dta")

#myformula <- formula(cbind(y1,y2) ~ Euc_A_Verb_Coop + Euc_A_Mater_Coop + Euc_A_Verb_Confl + Euc_A_Mater_Confl + Euc_B_Verb_Coop + Euc_B_Mater_Coop + Euc_B_Verb_Confl + Euc_B_Mater_Confl + Other_Crisis)

# IF UCDP...

UCDP <- TRUE
mydata <- read.dta("seq_analysis_ucdp_111914.dta")
myformula <- formula(cbind(y1,y2) ~ Euc_A_Verb_Coop + Euc_A_Mater_Coop + Euc_A_Verb_Confl + Euc_A_Mater_Confl +
Euc_B_Verb_Coop + Euc_B_Mater_Coop + Euc_B_Verb_Confl + Euc_B_Mater_Confl)


y1 <- vector(mode="numeric",length=nrow(mydata))
y2 <-vector(mode="numeric",length=nrow(mydata))
mydata <- cbind(mydata,y1,y2)
mydata$y1[mydata$Dyad_Type==3] <- 1 #y1 conflict
mydata$y2[mydata$Dyad_Type==3] <- 1 #y2 conflict
mydata$y1[mydata$Dyad_Type==2] <- 1 #y1 conflict
mydata$y2[mydata$Dyad_Type==1] <- 1 #y2 conflict

mydata$Dyad_Type[mydata$y1==0 & mydata$y2==0] <- 0
mydata$Dyad_Type[(mydata$y1==1 & mydata$y2==0)|mydata$y1==0 & mydata$y2==1] <- 1
mydata$Dyad_Type[mydata$y1==1 & mydata$y2==1] <- 2

mydata$Euc_A_Verb_Coop <- as.numeric(mydata$Euc_A_Verb_Coop) 
mydata$Euc_A_Mater_Coop <- as.numeric(mydata$Euc_A_Mater_Coop)
mydata$Euc_A_Verb_Confl <- as.numeric(mydata$Euc_A_Verb_Confl)
mydata$Euc_A_Mater_Confl <- as.numeric(mydata$Euc_A_Mater_Confl)
mydata$Euc_B_Verb_Coop <- as.numeric(mydata$Euc_B_Verb_Coop)
mydata$Euc_B_Mater_Coop <- as.numeric(mydata$Euc_B_Mater_Coop)
mydata$Euc_B_Verb_Confl <- as.numeric(mydata$Euc_B_Verb_Confl)
mydata$Euc_B_Mater_Confl <- as.numeric(mydata$Euc_B_Mater_Confl)

if(!UCDP){mydata$Other_Crisis <- as.numeric(mydata$Other_Crisis)}

mydata <- cbind(mydata,1)

mydata$Dyad_Type <- as.factor(mydata$Dyad_Type)
data.0 <- mydata[which(mydata$Dyad_Type==0),]
data.1 <- mydata[which(mydata$Dyad_Type==1),]
data.2 <- mydata[which(mydata$Dyad_Type==2),]


#################################
## using VGAM for bivariate logit

# first we need a variable for training data
set.seed(4422)
train <- vector(mode="numeric",length=nrow(mydata))
for (i in 1:nrow(mydata)) {
    if(runif(1)>=0.5) {
        train[i] <- TRUE
    }
}

mydata <- cbind(mydata,train)


train.data <- mydata[which(mydata$train==TRUE),]
pred.data <- mydata[which(mydata$train==FALSE),]

## Models

# Euclidean
euc.fit = vglm(myformula,
    binom2.or(exchangeable=TRUE), data=train.data)

euc.pred.fit <- predict(euc.fit,newdata=pred.data,type="response")


#######################
## Clearly not the cleanest way to do this...
## Our three predicted probabilities for each model
# Euclidean
Euc_pred_cat_0 <- vector(mode="integer",length=nrow(pred.data))
Euc_pred_cat_1 <- vector(mode="integer",length=nrow(pred.data))
Euc_pred_cat_2 <- vector(mode="integer",length=nrow(pred.data))

euc.0 <- euc.pred.fit[,1]
euc.1 <- euc.pred.fit[,2]+euc.pred.fit[,3]
euc.2 <- euc.pred.fit[,4]

Euc_pred_cat_0[euc.0 > euc.1 & euc.0 > euc.2] <- 1
Euc_pred_cat_1[euc.1 > euc.0 &  euc.1 > euc.2] <- 1
Euc_pred_cat_2[euc.2 > euc.0 &  euc.2 > euc.1] <- 1


############################


## NOTE: mydata is redefined so that I can reuse code from another test
rm(mydata)
mydata <- pred.data
mydata <- cbind(mydata,Euc_pred_cat_0,Euc_pred_cat_1,Euc_pred_cat_2)
attach(mydata)


#######################
## Correctly Classified

# Euclidean
Euc_pred_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_correct)
Euc_pred_correct[(Euc_pred_cat_0 == 1 & Dyad_Type == 0) | (Euc_pred_cat_1 == 1 & Dyad_Type == 1) | (Euc_pred_cat_2 == 2 & Dyad_Type == 2)] <- TRUE
Euc_correct_cat <- sum(Euc_pred_correct) / nrow(mydata)



######################################
## Sensitivity and Pos Predict Value 0

# Euclidean
Euc_pred_0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_0_correct)
Euc_pred_0_correct[Euc_pred_cat_0 == 1 & Dyad_Type == 0] <- TRUE
Euc_pos_pred_val_0 <- sum(Euc_pred_0_correct) / sum(Euc_pred_cat_0)
Euc_sensitivity_0  <- sum(Euc_pred_0_correct) / sum(Dyad_Type==0)



######################################
## Sensitivity and Pos Predict Value 1

# Euclidean
Euc_pred_1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_1_correct)
Euc_pred_1_correct[Euc_pred_cat_1 == 1 & Dyad_Type == 1] <- TRUE
Euc_pos_pred_val_1 <- sum(Euc_pred_1_correct) / sum(Euc_pred_cat_1)
Euc_sensitivity_1  <- sum(Euc_pred_1_correct) / sum(Dyad_Type==1)



######################################
## Sensitivity and Pos Predict Value 2

# Euclidean
Euc_pred_2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_2_correct)
Euc_pred_2_correct[Euc_pred_cat_2 == 1 & Dyad_Type == 2] <- TRUE
Euc_pos_pred_val_2 <- sum(Euc_pred_2_correct) / sum(Euc_pred_cat_2)
Euc_sensitivity_2  <- sum(Euc_pred_2_correct) / sum(Dyad_Type==2)


#######################################
## Specificity and Neg Predict Value ~0

# Euclidean
Euc_pred_not0_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_not0_correct)
Euc_pred_not0_correct[Euc_pred_cat_0 != 1 & Dyad_Type != 0] <- TRUE
Euc_neg_pred_val_0 <- sum(Euc_pred_not0_correct) / (sum(Euc_pred_cat_1) + sum(Euc_pred_cat_2))
Euc_specificity_0  <- (sum(Euc_pred_1_correct) + sum(Euc_pred_2_correct)) / sum(Dyad_Type != 0)


#######################################
## Specificity and Neg Predict Value ~1

# Euclidean
Euc_pred_not1_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_not1_correct)
Euc_pred_not1_correct[Euc_pred_cat_1 != 1 & Dyad_Type != 1] <- TRUE
Euc_neg_pred_val_1 <- sum(Euc_pred_not1_correct) / (sum(Euc_pred_cat_0) + sum(Euc_pred_cat_2))
Euc_specificity_1  <- (sum(Euc_pred_0_correct) + sum(Euc_pred_2_correct)) / sum(Dyad_Type != 1)



###############################################
## Specificity Predict and Neg Predict Value ~2

# Euclidean
Euc_pred_not2_correct <- vector(mode="logical", length=nrow(mydata))
mydata <- cbind(mydata,Euc_pred_not2_correct)
Euc_pred_not2_correct[Euc_pred_cat_2 != 1 & Dyad_Type != 2] <- TRUE
Euc_neg_pred_val_2 <- sum(Euc_pred_not2_correct) / (sum(Euc_pred_cat_0) + sum(Euc_pred_cat_1))
Euc_specificity_2  <- (sum(Euc_pred_0_correct) + sum(Euc_pred_1_correct)) / sum(Dyad_Type != 2)


### printing tables in latex

out.table <- data.frame(0, 0, 0, 0)
colnames(out.table) <- c("Performance Measure", "Type 0", "Type 1", "Type 2")
out.table[1,] <- c("Sensitivity", Euc_sensitivity_0, Euc_sensitivity_1, Euc_sensitivity_2)
out.table[2,] <- c("Specificity", Euc_specificity_0, Euc_specificity_1, Euc_specificity_2)
out.table[3,] <- c("Pos. Pred. Value", Euc_pos_pred_val_0, Euc_pos_pred_val_1, Euc_pos_pred_val_2)
out.table[4,] <- c("Neg. Pred. Value", Euc_neg_pred_val_0, Euc_neg_pred_val_1, Euc_neg_pred_val_2)

out.table[,2] <- as.numeric(out.table[,2])*100
out.table[,3] <- as.numeric(out.table[,3])*100
out.table[,4] <- as.numeric(out.table[,4])*100

xtable(out.table, digits=2)


### printing tables in latex
out.table2 <- data.frame(0, 0, 0, 0, 0)
colnames(out.table2) <- c("", "Type 0", "Type 1", "Type 2", "Total")
out.table2[1,1:4] <- c("Type 0", length(which(Euc_pred_cat_0==1 & mydata$Dyad_Type==0)), length(which(Euc_pred_cat_1==1 & mydata$Dyad_Type==0)), length(which(Euc_pred_cat_2==1 & mydata$Dyad_Type==0)))
out.table2[2,1:4] <- c("Type 1", length(which(Euc_pred_cat_0==1 & mydata$Dyad_Type==1)), length(which(Euc_pred_cat_1==1 & mydata$Dyad_Type==1)), length(which(Euc_pred_cat_2==1 & mydata$Dyad_Type==1)))
out.table2[3,1:4] <- c("Type 2", length(which(Euc_pred_cat_0==1 & mydata$Dyad_Type==2)), length(which(Euc_pred_cat_1==1 & mydata$Dyad_Type==2)), length(which(Euc_pred_cat_2==1 & mydata$Dyad_Type==2)))

out.table2[,2] <- as.numeric(out.table2[,2])
out.table2[,3] <- as.numeric(out.table2[,3])
out.table2[,4] <- as.numeric(out.table2[,4])

out.table2[4,1:4] <- c("Total", sum(out.table2[1:3,2]), sum(out.table2[1:3,3]), sum(out.table2[1:3,4]))
out.table2[1:3,5] <- c(sum(as.numeric(out.table2[1,2:4])), sum(as.numeric(out.table2[2,2:4])), sum(as.numeric(out.table2[3,2:4])))
out.table2[4,5] <- sum(as.numeric(out.table2[4,2:4]))

xtable(out.table2)




